
clear all
set more off

** Change path to re-run on another computer
cd ""
**


*$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$
*** 1) Merge price and fuel use data
*$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$

use "Energy_prices_imputed(index)", clear

keep isoalpha2code isoalpha3code isonum deflator2010 country OECD year mdummy *consUS *PPPcons *totindex *respindex *resptotindex flag_VEPL
assert year<1995 if deflator2010==. & cou !="Taiwan, Republic of China" & cou!="Venezuela (Bolivarian Republic of)"
merge 1:m country year using "IPOL Fuel type use per sector_agregated"
assert _m!=1
drop if year<1978

gen one=1 if _m==2  // countries that are not contained in price data

bysort country: egen check=count(one)
tab check
*drop if _m==2 // countries that are not contained in price data
drop if check==2850 // countries that are NEVER contained in price data. the same as drop _m==2
// 5*15*38 (type sec year) all missing
drop check one 

drop _m


replace mdummy=0 if mdummy==.
replace mdummy=0 if mdummy==1
replace mdummy=1 if mdummy==2
label var mdummy "1 if added data, 0 otherwise"

rename mdummy flag_addprice


*$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$
* 2) create variable weights for fuel use and EF
*$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$

*create weights without using average
bysort country year sector: egen double ktoe_total=total(ktoe_extra), missing
gen double variable_weight_orig=ktoe_extra/ktoe_total


* Descriptive stat for justifying taking average acroos countries rather than industry weight within country
********************************************
/*
preserve

set scheme s2manual

drop if sector=="Industry" | sector=="Industry Unido" | sector=="Non-specified (industry)"
drop if year <=1994
drop if type == "Biofuels and waste" 

gen cv_s=.
gen cv_c=.

levelsof sector, local(sector) 
levelsof type, local(type)
levelsof country, local(country)

forval y=  1995/2015{
foreach s of local sector{
foreach t of local type{

sum variable_weight_orig if sector=="`s'" & type=="`t'" & year==`y'
replace cv_s = r(sd)/r(mean) if sector=="`s'" & type=="`t'" & year==`y' & r(N)>2
}
}
}

forval y=  1995/2015{
foreach c of local country{
foreach t of local type{

sum variable_weight_orig if country=="`c'" & type=="`t'" & year==`y'
replace cv_c = r(sd)/r(mean) if country=="`c'" & type=="`t'" & year==`y'  & r(N)>2
}
}
}

label var cv_s "CV for fuel type across countries within sector-year"
label var cv_c "CV for fuel type across sectors within country-year"


histogram cv_s, start(0.1)
graph export "Graphs/justifying averages across countries for fuel data/histogram within sector.pdf", replace

histogram cv_c,  start(0.)
graph export "Graphs/justifying averages across countries for fuel data/historgram within country.pdf", replace

tabstat cv*, statistics(mean sd p1 p25 p75 p99)
ttest cv_s==cv_c
sdtest cv_s==cv_c

restore 
*/
********************************************


*replace weights with industry average weight within country
********************************************
g weighttemp=variable_weight_orig

bysort country type year: egen double tempw=mean(variable_weight_orig) if sector=="Industry"
bysort country type year: egen double industry_weight=mean(tempw)


gen double temp=1 if variable_weight_orig==. & industry_weight!=.
bysort country sector year: egen flag=total(temp) // how many missing. 5 = all fuels missing.

gen double temp2= industry_weight if temp!=.
bysort country year sector: egen double temp3=total(temp2), missing 
gen double temp4=1-temp3

*need to account for total amount > or < than 1
count if temp4<0 & flag==5 // all fuels missing, if all replaced by industry. is for rounding
count if temp4<0 & flag==4 // all fuels missing, if 4 replaced by industry. can be not only rounding.

*replace if all are missing  for a sector: (through time, because of ktoe extra polated).
replace industry_weight= industry_weight/temp3 if flag==5 // this is the variable weight, of the industry. here temp3 is just 1
replace variable_weight_orig= industry_weight if flag ==5

*replace if all but one are missing for a sector: replacing all, since we have no relative comparison if only one ktoe in that sector.
count if temp4<0 & flag==4
count if flag==4

bysort country year sector: egen double temp3a=total(industry_weight) if flag==4, missing 
replace industry_weight= industry_weight/temp3a if flag==4
replace variable_weight_orig= industry_weight if flag ==4

*scale existing weights so add up to one and replace *remaining* missings with averages. now all weights add up to one
bysort country sector year: egen check=total(variable_weight_orig), missing
sum check
drop check
replace variable_weight_orig= variable_weight_orig* temp4 if temp4!=. & flag<4 // so for flag==0 not used, since temp4==. so weight is downscaled so rest of industry weights are filling up the remaining weights
replace variable_weight_orig= industry_weight if variable_weight_orig==. 

* check for consistency
count if variable_weight_orig<=0
count if variable_weight_orig>=1
count if variable_weight_orig==.
assert variable_weight_orig<=1 & variable_weight_orig>=0 if variable_weight_orig!=.

bysort country sector year: egen check=total(variable_weight_orig)
bysort country sector year: egen check2=count(variable_weight_orig)
drop check check2

bysort country year sector: egen vartot=total(variable_weight_orig), missing

tab vartot

recast float variable_weight_orig, force
format %9.0g variable_weight_orig


drop ktoe_type ktoeipol cc_avg ktoe_extra ktoe_total industry_weight tempw temp  temp2 temp3 temp4 temp3a vartot

rename flag flag_weight_avg_repl

*check flag
tab flag_weight_avg_repl
g one=1 if variable_weight_orig!=. & weighttemp==.
bysort country sector year: egen temp=count(one)
tab temp flag_weight_avg_repl
assert temp==flag_weight_avg_repl

label var flag_weight_avg_repl "Number of sectoral fuel weights replace weights by industry weights"
replace flag_weight_avg_repl=5 if flag_weight_avg_repl==4 // because biofuels otherwise have 5 fuels replaced might be misleading..

drop temp

*replace EF with industry average EF within country
***********************************************************
bysort country type year: egen double tempw=mean(EF_extra) if sector=="Industry"
bysort country type year: egen double industry_EF=mean(tempw)
tab year if EF_extra==. & industry_EF!=.
tab cou if EF_extra==. & industry_EF!=.
tab sec if EF_extra==. & industry_EF!=.

replace EF_extra=industry_EF if EF_extra==. & industry_EF!=. // & sec!="Industry Unido" now also do it for ind unido

assert EF_extra!=. if industry_EF!=. // & sec!="Industry Unido"
drop tempw industry_EF

*$$$$$$$$$$$$$$$$$$$$$$$$$$$
*3) Create inflation dummy
*$$$$$$$$$$$$$$$$$$$$$$$$$$$

egen id=group(country sector type)
xtset id year
gen inflation=(deflator2010-L.deflator2010)/L.deflator2010

drop id deflator2010

label var inflation "Inflation (from GDP-deflator)"

order inflation, after(year)

*$$$$$$$$$$$$$$$$$$$$$$$$$$$
*4) New flag for energy use on cou sec year level
*$$$$$$$$$$$$$$$$$$$$$$$$$$$

tab flag_weight_avg_repl flag_ktoe
g flag=-2 if flag_ktoe==2 & flag_weight_avg_repl==0
replace flag=-1 if flag_ktoe==1 & flag_weight_avg_repl==0
replace flag=0 if flag_ktoe==0 & flag_weight_avg_repl==0
replace flag=1 if flag_weight_avg_repl==1
replace flag=2 if flag_weight_avg_repl==2
replace flag=3 if flag_weight_avg_repl==3
replace flag=5 if flag_weight_avg_repl==5
rename flag flag_euse
label var flag_euse "0=all obs., -2=extrap., -1=interp., 1-5=# sec. fuel weights repl. by ind. w."
//  so for -2-0 no underlying observation is replaced by industry weights, above 0 could also be based on polated and replaced weights


isid cou sec year type

assert year<1996 if inflation==.  & cou !="Taiwan, Republic of China" & cou!="Venezuela (Bolivarian Republic of)"
assert OECD!=.

*$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$
*** 5) Create new weights without biofuel
*$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$

gen temp= variable_weight_orig  if  type=="Biofuels and waste"
bysort country year sector: egen biofuel_share=mean(temp)
replace biofuel_share=0 if biofuel_share==.  //China, Kaz, Turkey never biofules, so take zero

drop if type=="Biofuels and waste"

gen variable_weight= variable_weight_orig/(1-biofuel_share)
replace variable_weight= variable_weight_orig if biofuel_share==.

drop temp



*$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$
***6) Compute VEPL for values where amended with real RESPECTIVE index and on top of it with one of the more aggregate indices
*$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$

gen temp=.
replace temp= oil_consUS_resptotindex*variable_weight if type=="Oil products" 
replace temp= gas_consUS_resptotindex*variable_weight if type=="Natural gas"
replace temp= coal_consUS_resptotindex*variable_weight if type=="Coal"
replace temp= electricity_consUS_resptotindex*variable_weight if type=="Electricity and heat"

bysort country year sector: egen VEPL_MER =total(temp) if temp!=., missing
bysort country year sector: egen flag=count(temp)
replace VEPL_MER =. if flag!=4


drop temp flag

gen tempPPP=.
replace tempPPP= oil_PPPcons_resptotindex*variable_weight if type=="Oil products" 
replace tempPPP= gas_PPPcons_resptotindex*variable_weight if type=="Natural gas"
replace tempPPP= coal_PPPcons_resptotindex*variable_weight if type=="Coal"
replace tempPPP= electricity_PPPcons_resptotindex*variable_weight if type=="Electricity and heat"

bysort country year sector: egen VEPL_PPP =total(tempPPP) if tempPPP!=., missing
bysort country year sector: egen flag=count(tempPPP)
replace VEPL_PPP =. if flag!=4

drop tempPPP flag
replace VEPL_PPP =. if cou=="Taiwan, Republic of China"


tempfile vepldata
save `vepldata'






*$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$
**7) Create table 7
*$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$

keep country year oil_consUS_resptotindex gas_consUS_resptotindex coal_consUS_resptotindex electricity_consUS_resptotindex sector type variable_weight

replace type="coal" if type=="Coal"
replace type ="electricity" if type=="Electricity and heat"
replace type= "gas" if type=="Natural gas"
replace type = "oil"  if type =="Oil products"

reshape wide variable_weight, i(country year oil_consUS_resptotindex gas_consUS_resptotindex coal_consUS_resptotindex electricity_consUS_resptotindex sector) j(type) string

keep if year>1994

foreach x in oil gas coal electricity{
gen comp_`x'=`x'_consUS_resptotindex*variable_weight`x'
}

gen WAPL=comp_oil+ comp_gas+ comp_coal+ comp_electricity

foreach x in oil gas coal electricity{
gen share_`x'=comp_`x'/WAPL
}


drop if sector=="Industry" | sector=="Non-specified (industry)" | sector=="Industry Unido"
*scatter share_electricity year if cou=="Japan", by(sector)
tab sec

drop if share_oil==.

tabstat share_oil share_gas share_coal share_electricity, by(country) statistics(mean)

tabout sector using  "Graphs/some descriptive statistics/raw price evolution few countries/table4.tex", cells(mean share_oil mean share_gas mean share_coal mean share_electricity) ///
replace sum style(tex) format(2)






*$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$
**7) Add imputed prices for FEPI
*$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$
use "Energy_prices_imputed(index)_forFEPI", clear

keep isoalpha2code year oil_log_imputed coal_log_imputed gas_log_imputed electricity_log_imputed flag_FEPI 

merge 1:m isoalpha2code year using `vepldata'
drop _m
assert year<1996 if inflation==.  & cou !="Taiwan, Republic of China" & cou!="Venezuela (Bolivarian Republic of)"
assert OECD!=.


order oil_log_imputed gas_log_imputed coal_log_imputed electricity_log_imputed, before(sector)
order year, after(country)


*$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$
*8) Create fixed weights
*$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$

forvalues i = 1995 2000 to 2010{
gen temp = variable_weight if year==`i'
bysort country sector type: egen fw_`i'=mean(temp)
drop temp
assert EF_extra!=. if fw_`i'!=. // & sec!="Industry Unido"
}

* Create simple average weights over period
***************

*only for 1995 and beyond
**********************
drop if year<1995
**********************

bysort country sector type: egen  fw_avg_95_14=mean(variable_weight)

*check if weights add to 1
bysort country sector year: egen  check=total(fw_avg_95_14)
sum check // yes works, only float point imprecision and rounding
drop check

gen tempw=variable_weight if year>1999
bysort country sector type: egen  fw_avg_00_14=mean(tempw)
drop tempw


*$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$
*9) Create new prices including hypothetical carbon taxes
*$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$

tab year if  EF!=EF_extra & (year==2010 | year==2013)
// these were had no toe in raw data for some sectors/years/countries

tabstat EF_extra if year==2010, stat(mean p10 median p90)  by(type)

*use year 2010 EF for FEPIfw2010. Add tax to price, create only for one year:
//fixed weights, since otherwise EF vary by year

local i =2010
gen temp = EF_extra if year==`i'
bysort country sector type: egen fwEF_`i'=mean(temp)
drop temp
assert fwEF_`i'!=. if fw_`i'!=. // & sec!="Industry Unido"

forvalues taxlevel=1/100{
gen double oil_log_wtax_`taxlevel'=log(exp(oil_log_imputed)+fwEF_2010*`taxlevel'/1000) if type=="Oil products" // EF_extra instead of  fwEF_2010 for variable EF . have prices per toe so need to divide by 1000
gen double gas_log_wtax_`taxlevel'=log(exp(gas_log_imputed)+fwEF_2010*`taxlevel'/1000) if type=="Natural gas"
gen double coal_log_wtax_`taxlevel'=log(exp(coal_log_imputed)+fwEF_2010*`taxlevel'/1000) if type=="Coal"
gen double electricity_log_wtax_`taxlevel'=log(exp(electricity_log_imputed)+fwEF_2010*`taxlevel'/1000) if type=="Electricity and heat"
}



*$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$
**10) Calculate FEPI where all underlying fuel prices (may be imputed) are required
*$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$

*A) with fixed weights in a particular year
************************
forvalues i = 1995 2000 to 2010{
gen double temp=.
replace temp= oil_log_imputed*fw_`i' if type=="Oil products"
replace temp= gas_log_imputed* fw_`i' if type=="Natural gas"
replace temp= coal_log_imputed* fw_`i' if type=="Coal"
replace temp= electricity_log_imputed* fw_`i' if type=="Electricity and heat"

bysort country sector year: egen FEPI_fw_`i'=total(temp), missing
bysort country year sector: egen flag=count(temp)
replace FEPI_fw_`i' =. if flag!=4

drop temp flag
}


*B) with simple average weights
************************
foreach i in _95_14 _00_14{
gen double temp=.
replace temp= oil_log_imputed*fw_avg`i' if type=="Oil products"
replace temp= gas_log_imputed* fw_avg`i' if type=="Natural gas"
replace temp= coal_log_imputed* fw_avg`i' if type=="Coal"
replace temp= electricity_log_imputed* fw_avg`i' if type=="Electricity and heat"

bysort country sector year: egen FEPI_fw_avg`i'=total(temp), missing
bysort country year sector: egen flag=count(temp)
replace FEPI_fw_avg`i' =. if flag!=4

drop temp flag
}


*C) with carbon taxes
************************
foreach i in 2010{
forvalues taxlevel=1/100 {
gen double temp=.
replace temp= oil_log_wtax_`taxlevel'*fw_`i' if type=="Oil products"
replace temp= gas_log_wtax_`taxlevel'* fw_`i' if type=="Natural gas"
replace temp= coal_log_wtax_`taxlevel'* fw_`i' if type=="Coal"
replace temp= electricity_log_wtax_`taxlevel'* fw_`i' if type=="Electricity and heat"

bysort country sector year: egen FEPI_fw_`i'_tax_`taxlevel'=total(temp), missing
bysort country year sector: egen flag=count(temp)
replace FEPI_fw_`i'_tax_`taxlevel' =. if flag!=4

drop temp flag
}
}


*$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$
*11) JUST THE CABON TAXES FOR VEPL
*$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$

foreach i in 2010{
forvalues taxlevel=1/100 {
gen double temp=.
replace temp= fwEF_2010*`taxlevel'/1000*fw_`i' if type=="Oil products"
replace temp= fwEF_2010*`taxlevel'/1000* fw_`i' if type=="Natural gas"
replace temp= fwEF_2010*`taxlevel'/1000* fw_`i' if type=="Coal"
replace temp= fwEF_2010*`taxlevel'/1000* fw_`i' if type=="Electricity and heat"

bysort country sector year: egen VEPL_fw_`i'_justtax_`taxlevel'=total(temp), missing
bysort country year sector: egen flag=count(temp)
replace  VEPL_fw_`i'_justtax_`taxlevel' =. if flag!=4

drop temp flag
}
}


*$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$
*12) Calculate FEPI with a threshold of minimum energy share to be included, so can omit some non-important fuels when missing prices
*$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$

gen miss=0
replace miss =1 if oil_log_imputed==. & type=="Oil products"
replace miss =1 if gas_log_imputed ==. & type=="Natural gas"
replace miss =1 if coal_log_imputed ==. & type=="Coal"
replace miss =1 if electricity_log_imputed ==. & type=="Electricity and heat"

foreach i in 1995 2000 2005 2010 avg_95_14 avg_00_14{
	gen check_`i'=fw_`i' if miss==1
	bysort country sector year: egen check_tot_`i'=total(check_`i'), missing


*for different thresholds, check which prices to be left out for consistency in country sector over years, then recalculate FEPI
forvalues x = 0/34{

	gen temp2=1 if miss==1 & check_tot<=`x'/100 & check_tot!=. & year>=1995
	bysort country sector type: egen leftout=total(temp2)
	
	* recalculate FEPI without components that are left out at any point for consistency
	gen double oil_temp=oil_log_imputed
	gen double gas_temp=gas_log_imputed
	gen double coal_temp=coal_log_imputed
	gen double electricity_temp=electricity_log_imputed
	replace oil_temp=. if leftout!=0 & type=="Oil products"
	replace gas_temp=. if leftout!=0 & type=="Natural gas"
	replace coal_temp=. if leftout!=0 & type=="Coal"
	replace electricity_temp=. if leftout!=0 & type=="Electricity and heat"

	gen double temp=.
	replace temp= oil_temp*fw_`i' if type=="Oil products"
	replace temp= gas_temp* fw_`i' if type=="Natural gas"
	replace temp= coal_temp* fw_`i' if type=="Coal"
	replace temp= electricity_temp* fw_`i' if type=="Electricity and heat"
	
	*recalculate total share of missing
	bysort country sector year: egen FEPI_`x'_fw_`i'=total(temp), missing
	gen check_`x'_`i'=fw_`i' if temp==.
	bysort country sector year: egen check_`x'_tot_`i'=total(check_`x'_`i'), missing

	*FEPI to missing if total share above threshold
	replace FEPI_`x'_fw_`i' =. if check_`x'_tot_`i'>=`x'/100 & check_`x'_tot_`i'!=.
	
	drop temp2 temp leftout oil_temp gas_temp coal_temp electricity_temp
	
	*can check if want to:
	drop check_`x'_tot_`i' check_`x'_`i'
	
	* For graphing the threshold
	egen c_`i'_`x'=count(FEPI_`x'_fw_`i')
	
}

drop check_`i' check_tot_`i'

}



***FOR CARBON TAXES:
forvalues taxlevel=1/100{
foreach i in 2010{
	gen check_`i'=fw_`i' if miss==1
	bysort country sector year: egen check_tot_`i'=total(check_`i'), missing

*for different thresholds, check which prices to be left out for consistency in country sector over years, then recalculate FEPI
// the ktoe as well as the EF have been replaced with industry averages.
foreach x in 12{

	gen temp2=1 if miss==1 & check_tot<=`x'/100 & check_tot!=. & year>=1995
	bysort country sector type: egen leftout=total(temp2)
	
	* recalculate FEPI without components that are left out at any point for consistency
	gen double oil_temp=oil_log_wtax_`taxlevel'
	gen double gas_temp=gas_log_wtax_`taxlevel'
	gen double coal_temp=coal_log_wtax_`taxlevel'
	gen double electricity_temp=electricity_log_wtax_`taxlevel'
	replace oil_temp=. if leftout!=0 & type=="Oil products"
	replace gas_temp=. if leftout!=0 & type=="Natural gas"
	replace coal_temp=. if leftout!=0 & type=="Coal"
	replace electricity_temp=. if leftout!=0 & type=="Electricity and heat"

	gen double temp=.
	replace temp= oil_temp*fw_`i' if type=="Oil products"
	replace temp= gas_temp* fw_`i' if type=="Natural gas"
	replace temp= coal_temp* fw_`i' if type=="Coal"
	replace temp= electricity_temp* fw_`i' if type=="Electricity and heat"
	
	*recalculate total share of missing
	bysort country sector year: egen FEPI_`x'_fw_`i'_tax_`taxlevel'=total(temp), missing
	gen check_`x'_`i'=fw_`i' if temp==.
	bysort country sector year: egen check_`x'_tot_`i'=total(check_`x'_`i'), missing

	*FEPI to missing if total share above threshold
	replace FEPI_`x'_fw_`i'_tax_`taxlevel' =. if check_`x'_tot_`i'>=`x'/100 & check_`x'_tot_`i'!=.
	
	drop temp2 temp leftout oil_temp gas_temp coal_temp electricity_temp
	
	*can check if want to:
	drop check_`x'_tot_`i' check_`x'_`i'
	
}

drop check_`i' check_tot_`i'

}
}

drop miss


*$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$
*13) Create figure 10
*$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$
*Generate graphs to justify threshold
***********************************
set scheme s2manual

foreach i in 2010 {
preserve
keep c_`i'_*
duplicates drop
gen id=1
reshape long c_`i'_ , i(id) j(threshold)
replace c_`i'_=c_`i'_/4
label var threshold "Threshold for missing weight in %"
twoway line c_`i'_ threshold, sort xlabel(0(2)34,grid)
graph export "Graphs/choosing FEPI threshold/FEPI_fw_`i'.pdf", replace
gen c_`i'_growth=(c_`i'_-c_`i'_[_n-1])/c_`i'_[_n-1]
label var c_`i'_growth "Growth in observations in %"
replace c_`i'_growth=c_`i'_growth*100
twoway line c_`i'_growth threshold, sort xlabel(0(2)34,grid) graphregion(color(white))
graph export "Graphs/choosing FEPI threshold/FEPI_fw_`i'_growth.pdf", replace
restore
}

drop c_*
***********************************


compress


forvalues i = 1995 2000 to 2010{
label var FEPI_fw_`i' "Index of log prices in cou-sec with fixed sector weights `i'"
}


label var FEPI_fw_avg_95_14 "Index of log prices in cou-sec with fixed sector weights avg 1995-2014"
label var FEPI_fw_avg_00_14 "Index of log prices in cou-sec with fixed sector weights avg 2000-2014"




*$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$
*14)a Create figure 15
*$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$


gen weight_oil  = variable_weight if type=="Oil products"
gen weight_gas  = variable_weight if type=="Natural gas"
gen weight_coal = variable_weight if type=="Coal"
gen weight_electricity = variable_weight if type=="Electricity and heat"

label var weight_oil "Oil"
label var weight_gas "Natural gas"
label var weight_coal "Coal"
label var weight_electricity "Electricity"

preserve
drop if sec=="Industry" | sector=="Non-specified (industry)" | sector=="Industry Unido"
keep if cou=="Russian Federation" | cou =="Italy" | cou=="Japan" | cou=="Sweden"

*levelsof sector, local(sect)
*foreach sec of local sect{
local sec "Chemical and petrochemical"
graph twoway scatter weight_oil weight_gas weight_coal weight_elec year if sector=="`sec'", /*
*/  ytitle("Fuel weight") by(country ,graphregion(color(white)) note(""))  ylabel(#5, angle(horizontal)) connect(direct direct direct direct) lcolor(gs4 gs8 gs6 gs2)
graph export "Graphs/some descriptive statistics/weights evolution example fuel switching/`sec'.pdf", replace
*}
restore



*$$$$$$$$$$$$$$$$$$$$$$$$$
*14b) Figure 4 and 5 and Table 8
*$$$$$$$$$$$$$$$$$$$$$$$$$

preserve 
keep if sector == "Industry"
drop if flag_euse == -2

rename VEPL_MER VEPL
keep if !missing(VEPL)
replace country = "Taiwan" if country == "Taiwan, Republic of China"
replace country = "USA" if country == "United States of America"
replace country = "South Korea" if country == "Korea, Republic of"
replace country = "South Africa" if country == "South Africa"
replace country = "Czech Rep." if country == "Czech Republic"

tab type
gen fuel = "elec"
replace fuel = "coal" if type == "Coal"
replace fuel = "gas" if type == "Natural gas"
replace fuel = "oil" if type == "Oil products"

drop sector type
rename variable_weight weight

keep weight country year  oil_consUS_resptotindex gas_consUS_resptotindex coal_consUS_resptotindex electricity_consUS_resptotindex VEPL fuel

reshape wide weight, i(country year oil_consUS_resptotindex gas_consUS_resptotindex coal_consUS_resptotindex electricity_consUS_resptotindex VEPL) j(fuel) string
rename oil_consUS_resptotindex priceoil
rename gas_consUS_resptotindex pricegas
rename coal_consUS_resptotindex pricecoal
rename electricity_consUS_resptotindex priceelec
reshape long price weight, i(country year VEPL) j(fuel) string

egen countryfuel = group(country fuel)
xtset countryfuel year
gen weight_hat = D.weight
gen price_hat = D.price
gen VEPL_hat = D.VEPL
sum VEPL_hat weight_hat price_hat

gen element_weight = price * weight_hat
bysort country year: egen fuelcomp = total(element_weight)
gen element_price = weight * price_hat
bysort country year: egen pricecomp = total(element_price)

bysort country year: gen unique = _n == 1
keep if unique == 1
keep country year pricecomp fuelcomp VEPL_hat VEPL
br

drop if year < 1998
gen byte is1998 = 0
replace is1998 = 1 if year == 1998
gen ccount = 1
bysort country: egen nbnmis = total(ccount)
tab nbnmis
sort country year
gen VEPL_priceonly = VEPL - fuelcomp

foreach xxx of varlist VEPL VEPL_priceonly {
	bysort country: egen `xxx'1998 = total(`xxx' * is1998)
	replace `xxx'1998 = . if `xxx'1998 == 0
	gen rel`xxx' = `xxx' / `xxx'1998
}
keep if !missing(relVEPL)

label variable relVEPL "Actual VEPL"
label variable relVEPL_priceonly "Without fuel switching"
label variable VEPL_hat "VEPL"
label variable pricecomp "Price component"
label variable fuelcomp "Fuel component"

*** GENERATING FIGURE 4 AND 5 ***
gen selected = 0
gen selected2 = 0
replace selected2 = 1 if country == "Turkey"
replace selected2 = 1 if country == "Czech Rep."
replace selected = 1 if country == "Denmark"
replace selected = 1 if country == "Japan"
sum VEPL_hat pricecomp fuelcomp, det

set scheme s2color
** Figure 4 **
twoway connected VEPL_hat pricecomp fuelcomp year if selected2 == 1, legend(col(3)) ylabel(, gmin gmax grid angle(horizontal)) ///
xtitle("") ytitle("Variation in USD / toe") xlabel(1998(1)2014, gmin gmax grid angle(vertical)) /// 
by(country, rescale rows(1)  graphregion(color(white)) bgcolor(white) note("")) yline(0, lcolor(black)) graphregion(color(white)) bgcolor(white)
graph export "Graphs/some descriptive statistics/sourcevariation/Figure4.pdf", replace


** Figure 5 **
twoway connected VEPL_hat pricecomp fuelcomp year if selected == 1, legend(col(3)) ylabel(, gmin gmax grid angle(horizontal)) ///
xtitle("") ytitle("Variation in USD / toe") xlabel(1998(1)2014, gmin gmax grid angle(vertical)) /// 
by(country, rescale rows(1) graphregion(color(white)) note("")) yline(0, lcolor(black))  graphregion(color(white)) bgcolor(white)
graph export "Graphs/some descriptive statistics/sourcevariation/Figure5.pdf", replace

set scheme s2manual


*** GENERATING TABLE 8 ***

gen sharepriceyear = 100 * abs(pricecomp) / (abs(pricecomp) + abs(fuelcomp))

order country year fuelcomp pricecomp sharepriceyear
gen firstperiod = 0
foreach xxx in 1998 1999 2000 2001 2002 {
replace firstperiod = 1 if year == `xxx'
}
gen second = 0
foreach xxx in 2003 2004 2005 2006 2007 2008 {
replace second = 1 if year == `xxx'
}
gen third = 0
replace third = 1 if year > 2009
gen notmissing = 0
replace notmissing = 1 if !missing(sharepriceyear)

bysort country: egen mean_shareprice = mean(sharepriceyear)
foreach xxx in firstperiod second third {
	bysort country: egen tot`xxx' = total(sharepriceyear * `xxx')
	bysort country: egen nb`xxx' = total(notmissing * `xxx')
	gen avg`xxx' = tot`xxx' / nb`xxx'
}

order country avg* mean_shareprice
bysort country: gen uniquec = _n == 1
gsort -mean_shareprice
keep country avgfirstperiod avgsecond avgthird mean_shareprice 
duplicates drop

restore


*$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$
*15) Collapse data
*$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$


foreach i in 1995 2000 2005 2010 avg_95_14 avg_00_14{
rename FEPI_12_fw_`i'  FEPI12_fw`i'
rename FEPI_fw_`i'  FEPI_fw`i' 
}
foreach i in  2010 {
rename FEPI_12_fw_`i'_*  FEPI12_fw`i'_*
}

keep isoalpha2code isoalpha3code isonum OECD country year inflation  sector biofuel_share VEPL_MER VEPL_PPP   FEPI_fw1995 FEPI_fw2000 FEPI_fw2005 FEPI_fw2010 FEPI_fwavg_95_14 FEPI_fwavg_00_14 FEPI12_fw1995 FEPI12_fw2000 FEPI12_fw2005 FEPI12_fw2010 FEPI12_fwavg_95_14 FEPI12_fwavg_00_14 ///
FEPI12_fw2010_tax_* ///
flag* flag_addprice VEPL_fw_2010_justtax*


order isoalpha2code isoalpha3code isonum  OECD country year inflation sector biofuel_share flag_euse VEPL_MER VEPL_PPP flag_VEPL FEPI_fw1995 FEPI_fw2000 FEPI_fw2005 FEPI_fw2010 flag_FEPI FEPI12_fw1995 FEPI12_fw2000 FEPI12_fw2005 FEPI12_fw2010

duplicates drop

*Drop countries that have no available data at all in any index:
bysort country: egen check=total(VEPL_MER), missing
bysort country: egen check1=total(FEPI_fw1995), missing
bysort country: egen check2=total(FEPI_fw2000), missing
bysort country: egen check3=total(FEPI_fw2005), missing
bysort country: egen check4=total(FEPI_fw2010), missing
bysort country: egen check5=total(FEPI_fwavg_95_14), missing
bysort country: egen check6=total(FEPI_fwavg_00_14), missing

bysort country: egen check1b=total(FEPI12_fw1995), missing
bysort country: egen check2b=total(FEPI12_fw2000), missing
bysort country: egen check3b=total(FEPI12_fw2005), missing
bysort country: egen check4b=total(FEPI12_fw2010), missing
bysort country: egen check5b=total(FEPI12_fwavg_95_14), missing
bysort country: egen check6b=total(FEPI12_fwavg_00_14), missing

egen aa=rowmiss(check check1 check2 check3 check4 check5 check6 check1b check2b check3b check4b check5b check6b)
drop if aa==13
drop aa check*

drop if sector=="Non-specified (industry)"



drop isonum isoalpha2code 
order isoalpha3code country OECD year inflation  sector biofuel_share flag_euse flag_addprice flag_VEPL flag_FEPI VEPL_MER VEPL_PPP FEPI12_fw1995 FEPI12_fw2000 FEPI12_fw2005 FEPI12_fw2010 FEPI12_fwavg_95_14 FEPI12_fwavg_00_14 FEPI_fw1995 FEPI_fw2000 FEPI_fw2005 FEPI_fw2010 FEPI_fwavg_95_14 FEPI_fwavg_00_14

rename FEPI_* FEPI_allfuels_*
rename FEPI12* FEPI*




*$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$
*16)  Create figure 9
*$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$


preserve
isid cou year sec

*** Graph using range of sectors:
***FOR VEPL

foreach year in 2010{
foreach taxlevel in 30{

*sorting:
gen temp=VEPL_MER if sec=="Industry"
bysort cou year: egen vepllab=max(temp)

drop if sec=="Industry" | sec=="Manufacturing" | sec=="Non-specified (industry)"
bysort country year: egen counonmis=count(VEPL_fw_2010_justtax_`taxlevel')
tab counonmis
keep if counonmis==12

bysort country year: egen maxd=max(VEPL_fw_2010_justtax_`taxlevel')
bysort country year: egen mind=min(VEPL_fw_2010_justtax_`taxlevel')
keep if year==`year'


*sorting:
replace country="USA" if country=="United States of America"
replace country="Korea" if country=="Korea, Republic of"
replace country="Venezuela" if country=="Venezuela (Bolivarian Republic of)"
replace country="Taiwan" if country=="Taiwan, Republic of China"
replace country="Russia" if country==" Russian Federation"

sort vepllab
egen cou_num=group(vepllab)
labmask cou_num , values(country) 

*so can add dots:
g temp2=.
replace temp2=VEPL_fw_2010_justtax_`taxlevel' if VEPL_fw_2010_justtax_`taxlevel'==maxd | VEPL_fw_2010_justtax_`taxlevel'==mind


keep  country sec maxd mind vepllab cou_num temp2 VEPL_fw_2010_justtax_`taxlevel'
sort VEPL_fw_2010_justtax_`taxlevel' country sec
duplicates drop  country VEPL_fw_2010_justtax_`taxlevel', force

*for displaying labels:

gen sec=""
replace sec="I&S" if sector=="Iron and steel"
replace sec="W&WP" if sector=="Wood and wood products"
replace sec="NMM" if sector=="Non-metallic minerals"
replace sec="TE" if sector=="Transport equipment"
replace sec="M&Q" if sector=="Mining and quarrying"
replace sec="T&L" if sector=="Textile and leather"
replace sec="C&P" if sector=="Chemical and petrochemical"
replace sec="PPP" if sector=="Paper, pulp and print"
replace sec="NFM" if sector=="Non-ferrous metals"
replace sec="C" if sector=="Construction"
replace sec="M" if sector=="Machinery"
replace sec="F&T" if sector=="Food and tobacco"
generate pos = 12
replace pos = 6 if mind==VEPL_fw_2010_justtax_`taxlevel'

replace sec="" if temp2==.

stripplot VEPL_fw_2010_justtax_`taxlevel', vertical over(country) msymbol(D) msize(0.6) mlabel(sec) mlabsize(*0.6) mcolor(gs11) mlabvposition(pos)  xla(, ang(v) labsize(2.3) grid)  box(bfcolor(gs11) blcolor(white) barw(0.15)  blwidth(none) ) pctile(0) whiskers(lcolor(gs11) lwidth(0.3)) xtitle("Country",size(2.5)) ytitle("$-increase in VEPL with `taxlevel'$ tax per tCO2",size(2.5)) ylabel(,labsize(2.5)) plotregion(margin(l-1)) plotregion(margin(r-1)) yscale(range(300)) ylabel(0(50)300) graphregion(fcolor(white) lcolor(white))  
*graph export "Graphs/some descriptive statistics/carbon tax/stripplot_wmarks_VEPL_`taxlevel'_`year'.pdf", replace
drop temp2
restore
}
}



drop VEPL_fw_2010_justtax*


*$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$
*17) Create delta for FEPI for carbon taxes
*$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$

forvalues i=1/100{
g FEPI_tax_`i'=FEPI_fw2010_tax_`i'-FEPI_fw2010
g test_`i'=exp(FEPI_fw2010_tax_`i')-exp(FEPI_fw2010)
}
*br year sec FEPI_fw2010 FEPI_12_fw_2010_tax_25 FEPI_12_fw_2010_tax_40 test_25 test_40  if   cou=="South Africa" & (sec=="Mining and quarrying" | sec=="Wood and wood products")

drop test_*


** Check correlation of delta
preserve 
keep cou sec year FEPI_tax_25
reshape wide FEPI_tax_25,i(cou sec) j(year)
pwcorr FEPI*,sig
*by cou: pwcorr FEPI_tax_252003 FEPI_tax_252011,sig 
*pwcorr FEPI* if cou=="Austria",sig
restore

*keep tax level 10 to 50
*only based on 2010. nonlinear, so can change slightly depending which base year take. for VEPL will be same no matter which base year:
foreach i in 10 20 30 40 50{
g temp=FEPI_tax_`i' if year==2008
bysort cou sec: egen tFEPI_tax_`i'=mean(temp)
drop temp
assert  FEPI_tax_`i'==tFEPI_tax_`i' if year==2008
}

drop FEPI_tax_* FEPI_fw2010_tax_*
rename tFEPI_tax_* FEPI_fw2010_tax_*


*$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$
*18) Final cleaning and save
*$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$


replace sector="Manufacturing" if sector=="Industry Unido"
sort country sector year

*label variables:
label var isoalpha3code "ISO country code"
label var OECD "OECD membership"
label var biofuel_share "Share of biofuel in original underlying sector energy mix"
label var VEPL_MER "Variable weight Energy Price Level, constant 2010US$, market exchange rates"
label var VEPL_PPP "Variable weight Energy Price Level, constant 2010US$, purchasing power parity"
label var VEPL_MER "Variable weight Energy Price Level, constant 2010US$, market exchange rates"
label var VEPL_PPP "Variable weight Energy Price Level, constant 2010US$, purchasing power parity"
label var sector "Sector, all manufacturing or all industry"
label var year "Year"

foreach x in 1995 2000 2005 2010 avg_95_14 avg_00_14{
label var FEPI_fw`x' "Fixed (in `x') weight Energy Price Index, real"
}
foreach x in 1995 2000 2005 2010 avg_95_14 avg_00_14{
label var FEPI_allfuels_fw`x' "Fixed (in `x') weight Energy Price Index, real, no fuels omitted"
}

foreach x in 10 20 30 40 50{
label var FEPI_fw2010_tax_`x' "FEPI_fw2010 price increase from a `x'$/tCO2 carbon tax"
}


drop flag_ktoe flag_weight_avg_repl


save "VEPL_FEPI_data.dta", replace
